# 首先，我们需要导入必要的库
import pandas as pd
from scipy import stats

# 读取csv文件到dataframe
df = pd.read_csv('insurance.csv')

# 查看保险费用的基本统计数据，以便了解其分布
print(df['charges'].describe())

# 将数据分为吸烟者和非吸烟者
smokers = df[df['smoker'] == 'yes']['charges']
non_smokers = df[df['smoker'] == 'no']['charges']

# 查看吸烟者和非吸烟者的保险费用是否有显著差异
t_statistic, p_value = stats.ttest_ind(smokers, non_smokers)

print("t statistic:", t_statistic)
print("p-value:", p_value)